package main;

import cn.hutool.core.io.FileUtil;
import cn.hutool.json.JSONUtil;
import entity.Pubmed;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import utils.PubmedUtils2;
import utils.SmallTool;

import java.util.ArrayList;
import java.util.List;

/**
 * @Author: 葛成瑞
 * @Date: 2021/4/10 9:54
 */
public class ReadXml {
    public static void main(String[] args) {

        String content = FileUtil.readUtf8String("F:\\pmxml\\33636963,33636964.xml");

        //解析html
        Document doc = Jsoup.parse(content);
        try {
            // 主要是用于进行过滤掉参文房子参文doi混入文章doi
            doc.select("ReferenceList").remove();
        } catch (Exception e) {

        }
        List<Element> divList = new ArrayList<>();
        Elements pubmedArticles = doc.select("PubmedArticle");
        Elements pubmedBookArticles = doc.select("PubmedBookArticle");
        for (Element e : pubmedArticles) {
            divList.add(e);
        }
        for (Element e : pubmedBookArticles) {
            divList.add(e);
        }
        //
        List<Pubmed> pubmeds = PubmedUtils2.parseHtml(divList);
        SmallTool.printMess(JSONUtil.toJsonStr(pubmeds));

    }
}
